*------------------------------------------------------------------------------*
							*** SOEP MERGE ***
*------------------------------------------------------------------------------*


/* 
This do file is only for the SOEP remote environment. It can be sent as one email but the listing has to be reviewed by a real person for the last section (// Combine the outcome dataset with air pollution and weather data). 
All relevant variables are pre-defined in this do-file so make sure that all variables of interest are included before sending this email. Also note that the first 4 lines of the email should be lines 11-14 of this do-file. 
*/

** user = 				/* Insert here the SOEP remote username */
** password = 			/* Insert here the SOEP remote password */
** package = STATA
** project = GSOEP

*------------------------------------------------------------------------------*

***Paths on remote server where data is stored
global soep36 /* " Insert here pre-specified drive on remote server containing the raw SOEP data (version 36). This should be given to you by the data provider. " */
global mydata /* " Insert here pre-specified drive on remote server corresponding to your personal user drive. This should be given to you by the data provider. " */
global mydata_user /* " ${mydata}/user/ " */

*------------------------------------------------------------------------------*

*** Create SOEP dataset

**Dataset for control variables
use cid hid pid syear d11101 d11102ll d11104 d11107 d11108 d11109 i11101 i11102 i11103 i11110 l11101 l11102 m11104 m11125 m11126 using "${soep36}/pequiv", clear
xtset pid syear

*Socio-economic characteristics
gen female = d11102ll==2 if inrange(d11102ll,1,2)
gen age = d11101 if inlist(d11101,-1,-2,-3,-5,-8)==0
gen mar_status = d11104 if inrange(d11104,1,5)
label values mar_status d11104
gen married = 1 if mar_status==1 
replace married = 0 if inrange(mar_status,2,5)
gen children_hh = d11107 if inlist(d11107,-1,-2,-3,-5,-8)==0
gen hs_educ = d11108 if inrange(d11108,1,3)
label values hs_educ d11108
gen yrs_educ = d11109 if inlist(d11109,-1,-2,-3,-5,-8)==0

*Income variables
gen gross_hh_income = i11101 if inlist(i11101,-1,-2,-3,-5,-8)==0
gen n_hh_income = i11102 if inlist(i11102,-1,-2,-3,-5,-8)==0
gen hh_labour_income = i11103 if inlist(i11103,-1,-2,-3,-5,-8)==0
gen ind_labour_income = i11110 if inlist(i11110,-1,-2,-3,-5,-8)==0

*Residence variables
gen fed_state = l11101 if inrange(l11101,1,16)
label define l11101 1 "[1] Schleswig-Holstein 1", modify
label values fed_state l11101
gen east_ger = l11102==2 if inrange(l11102,1,2)

*Health and sport
gen freq_sports = m11104 if inrange(m11104,1,4)
label values freq_sports m11104
gen health_sat = m11125 if inrange(m11125,0,10)
gen sr_health = m11126 if inrange(m11126,1,5)
label values sr_health m11126
save "${mydata_user}airpollution_controls.dta", replace 


**Dataset for youth economic outcomes
use pid hid cid syear iyear iday ihour ilang iminute imonth intid jl0221-jl0232 jl0337-jl0348 jl0349 jl0361-jl0363 jl0381-jl0384 using "${soep36}/jugendl.dta", clear

*Youth Future Variables
gen prob_at_fav_uni = jl0221 if inrange(jl0221,0,100)
gen prob_at_uni = jl0222 if inrange(jl0222,0,100)
gen prob_at_workplace = jl0223 if inrange(jl0223,0,100)
gen prob_job_success = jl0224 if inrange(jl0224,0,100)
gen prob_unemployed = jl0225 if inrange(jl0225,0,100)
gen prob_limitation_fam = jl0226 if inrange(jl0226,0,100)
gen prob_selfemployed = jl0227 if inrange(jl0227,0,100)
gen prob_work_abroad = jl0228 if inrange(jl0228,0,100)
gen prob_marry = jl0229 if inrange(jl0229,0,100)
gen prob_with_partner = jl0230 if inrange(jl0230,0,100)
gen prob_one_child = jl0231 if inrange(jl0231,0,100)
gen prob_children = jl0232 if inrange(jl0232,0,100)
foreach var in prob_* {
	label values `var' jl0221_EN
} // var

*Youth Sources of Inequality
gen success_hardwork = jl0337 if inrange(jl0337,1,4)
gen success_exploiting = jl0338 if inrange(jl0338,1,4)
gen success_intelligence = jl0339 if inrange(jl0339,1,4)
gen success_family = jl0340 if inrange(jl0340,1,4)
gen success_training = jl0341 if inrange(jl0341,1,4)
gen success_money = jl0342 if inrange(jl0342,1,4)
gen success_grades = jl0343 if inrange(jl0343,1,4)
gen success_ruthless = jl0344 if inrange(jl0344,1,4)
gen success_connections = jl0345 if inrange(jl0345,1,4)
gen success_political = jl0346 if inrange(jl0346,1,4)
gen success_male = jl0347 if inrange(jl0347,1,4)
gen success_initiative = jl0348 if inrange(jl0348,1,4)
label define success_EN 1 "[1] Completely Agree" 2 "[2] Agree" 3 "[3] Disagree" 4 "[4] Completely Disagree"
foreach var in success_* {
	label values `var' success_EN
} // var

*Youth Risk Willingness
gen risk_taking_sr_y = jl0349 if inrange(jl0349,0,10)
label define risk_taking_youth 0 "[0] 0 Risk willingness (Risk averse)" 1 "[1] 1 Risk willingness" 2 "[2] 2 Risk willingness" 3 "[3] 3 Risk willingness" 4 "[4] 4 Risk willingness" 5 "[5] 5 Risk willingness" 6 "[6] 6 Risk willingness" 7 "[7] 7 Risk willingness" 8 "[8] 8 Risk willingness" 9 "[9] 9 Risk willingness" 10 "[10] 10 Risk willingness (Risk loving)"
label values risk_taking_sr_y risk_taking_youth
gen risk_averse_y = inrange(jl0349,0,4) if inrange(jl0349,0,10)
gen risk_loving_y = inrange(jl0349,6,10) if inrange(jl0349,0,10)

*Youth Trust 
gen trust_people_y = jl0361 if inrange(jl0361,1,7)
label define trust_youth 1 " [1] 1 Completely Disagree (Scale from 1-7)" 2 "[2] 2 on Scale from 1-7" 3 "[3] 3 on Scale from 1-7" 4 "[4] 4 on Scale from 1-7" 5 "[5] 5 on Scale from 1-7" 6 "[6] 6 on Scale from 1-7" 7 "[7] 7 Completely Agree (Scale from 1-7)"
label values trust_people_y trust_youth
gen trust_people_now_y = 8-jl0362 if inrange(jl0362,1,7)
gen distrust_strangers_y = 8-jl0363 if inrange(jl0363,1,7)
label define distrust_youth 1 " [1] 1 Completely Agree (Scale from 1-7)" 2 "[2] 2 on Scale from 1-7" 3 "[3] 3 on Scale from 1-7" 4 "[4] 4 on Scale from 1-7" 5 "[5] 5 on Scale from 1-7" 6 "[6] 6 on Scale from 1-7" 7 "[7] 7 Completely Disagree (Scale from 1-7)"
foreach var in trust_people_now_y distrust_strangers_y {
	label values `var' distrust_youth
} // var

*Creating a (percentage) measure of trust, where higher values are indicative of greater trust
egen trust_y = rowtotal(trust_people_y trust_people_now_y distrust_strangers_y) if trust_people_y!=. & trust_people_now_y!=. & distrust_strangers_y!=.
gen p_trust_y = (trust_y-3)/18

*Youth Affective well-being
gen angry_y = jl0381 if inrange(jl0381,1,5)
gen worried_y = jl0382 if inrange(jl0382,1,5)
gen happy_y = jl0383 if inrange(jl0383,1,5)
gen sad_y = jl0384 if inrange(jl0384,1,5)
label define affective 1 "[1] Very rarely" 2 "[2] Rarely" 3 "[3] Sometimes" 4 "[4] Often" 5 "[5] Very often"
foreach var in angry_y worried_y happy_y sad_y {
	label values `var' affective
} // var
gen angry_dummy_y = inrange(jl0381,4,5) if inrange(jl0381,1,5)
gen worried_dummy_y = inrange(jl0382,4,5) if inrange(jl0382,1,5)
gen happy_dummy_y = inrange(jl0383,4,5) if inrange(jl0383,1,5)
gen sad_dummy_y = inrange(jl0384,4,5) if inrange(jl0384,1,5)
save "${mydata_user}airpollution_youth.dta", replace 


**Dataset for adult economic outcomes
use pid cid hid syear iyear pmonin ptagin intid pdatmi pdatst pdauer1 pdauer2 pdauer3 plb0021 plb0022_h ple0026-ple0029 plh0016-plh0026 plh0129 plh0130 plh0134-plh0136 plh0131_* plh0132 plh0133 plh0182 plh0184-plh0187 plh0192-plh0194 plh0195 plh0196 plh0203 plh0204_v2 plh0206i01-plh0206i10 plh0244 plh0253 plh0254 plh0334 plh0007 plh0011_h plh0012_h plh0013_h plh0032-plh0036 using "${soep36}/pl.dta", clear

*Labour force status
gen unemployed = plb0021==1 if inlist(plb0021,-1,-2,-3,-5,-8)==0
gen labourforce_status = plb0021 if inlist(plb0021,-1,-2,-3,-5,-8)==0
replace labourforce_status = 3 if plb0021==2 & plb0022_h==9
label define lfs 1 "[1] Unemployed" 2 "[2] Employed" 3 "[3] Out of labour force"
label values labourforce_status lfs

*Stress and Exhaustion measures
gen pressed_for_time = ple0026 if inrange(ple0026,1,5)
gen feel_down = ple0027 if inrange(ple0027,1,5)
gen well_balanced = ple0028 if inrange(ple0028,1,5)
gen used_energy = ple0029 if inrange(ple0029,1,5)
foreach var in pressed_for_time feel_down well_balanced used_energy {
	label values `var' ple0026_EN
} // var
gen pressed_for_time_dummy = inrange(ple0026,4,5) if inrange(ple0026,1,5)
gen feel_down_dummy = inrange(ple0027,4,5) if inrange(ple0027,1,5)
gen well_balanced_dummy = inrange(ple0028,4,5) if inrange(ple0028,1,5)
gen used_energy_dummy = inrange(ple0029,4,5) if inrange(ple0029,1,5)

*Social Responsibility (preferences for redistribution)
gen pref_family_finsec = plh0016 if inrange(plh0016,1,5)
gen pref_care_psc = plh0017 if inrange(plh0017,1,5)
gen pref_care_sc = plh0018 if inrange(plh0018,1,5)
gen pref_ue_finsec = plh0019 if inrange(plh0019,1,5)
gen pref_job_creation = plh0020 if inrange(plh0020,1,5)
gen pref_sick_finsec = plh0021 if inrange(plh0021,1,5)
gen pref_help_sick = plh0022 if inrange(plh0022,1,5)
gen pref_old_finsec = plh0023 if inrange(plh0023,1,5)
gen pref_help_old = plh0024 if inrange(plh0024,1,5)
gen pref_rcare_finsec = plh0025 if inrange(plh0025,1,5)
gen pref_help_rcare = plh0026 if inrange(plh0026,1,5)
foreach var in pref_*  {
	label values `var' plh0016_EN
} // var

*Money donation measure based on actual donation
gen donate_money = plh0129==1 if inrange(plh0129,1,2)
gen money_donated = plh0130 if donate_money==1 & inlist(plh0130,-1,-2,-3,-5,-8)==0
replace money_donated=0 if donate_money==0

*Donation of blood
gen blood_donation_past10yrs = plh0131_v1==1 if inrange(plh0131_v1,1,2)
gen blood_donation_past5yrs = plh0131_v2==1 if inrange(plh0131_v2,1,2)
gen blood_donation_general = blood_donation_past10yrs if syear==2010
replace blood_donation_general = blood_donation_past5yrs if syear==2015
gen blood_donation_pastyear = plh0132==1 if inrange(plh0132,1,2)
gen blood_donation_2009 = plh0132==1 if inrange(plh0132,1,2) & syear==2010
gen blood_donation_2014 = plh0132==1 if inrange(plh0132,1,2) & syear==2015
gen blood_donate = blood_donation_general
replace blood_donate = . if plh0133==1

*Altruism measure based on hypothetical situation 
gen hypo_money_saved = (plh0134 / 10000) if inrange(plh0134,0,10000)
gen hypo_money_donated = (plh0135 / 10000) if inrange(plh0135,0,10000)
rename hypo_money_donated hypo_altruism
gen hypo_money_spent = (plh0136 / 10000) if inrange(plh0136,0,10000)

*Current Life Satisfaction 
gen life_satisfaction = plh0182 if inrange(plh0182,0,10)

*Affective well-being variables
gen angry = plh0184 if inrange(plh0184,1,5)
gen worried = plh0185 if inrange(plh0185,1,5)
gen happy = plh0186 if inrange(plh0186,1,5)
gen sad = plh0187 if inrange(plh0187,1,5)
gen angry_dummy = inrange(plh0184,4,5) if inrange(plh0184,1,5)
gen worried_dummy = inrange(plh0185,4,5) if inrange(plh0185,1,5)
gen happy_dummy = inrange(plh0186,4,5) if inrange(plh0186,1,5)
gen sad_dummy = inrange(plh0187,4,5) if inrange(plh0187,1,5)

*Trust measure
gen trust_people = 5-plh0192 if inrange(plh0192,1,4)
label define trust_people 1 " [1] Disagree Completely" 2 "[2] Disagree" 3 "[3] Agree Slightly" 4 "[4] Agree Completely"
label values trust_people trust_people
gen trust_people_now = plh0193 if inrange(plh0193,1,4)
label values trust_people_now plh0193_EN
gen caution_strangers = plh0194 if inrange(plh0194,1,4)
label values caution_strangers plh0194_EN

*Creating a (percentage / standardised) measure of trust, where higher values are indicative of greater trust
egen trust = rowtotal(trust_people trust_people_now caution_strangers) if trust_people!=. & trust_people_now!=. & caution_strangers!=.
gen p_trust = (trust-3)/9
egen stdtrust = std(trust)

*Fairness and Trustworthiness
gen people_fair = plh0195==2 if inrange(plh0195,1,2)
gen people_helpful = plh0196==1 if inrange(plh0195,1,2)

*Risk willingness from lottery
gen risk_lottery = plh0203 if inrange(plh0203,1,6)
gen risk_taking = 7 - risk_lottery

*Risk willingness from self-reported question
gen risk_taking_sr = plh0204_v2 if inrange(plh0204_v2,0,10)
gen risk_averse = inrange(plh0204_v2,0,4) if inrange(plh0204_v2,0,10)
gen risk_loving = inrange(plh0204_v2,6,10) if inrange(plh0204_v2,0,10)

*Positive Reciprocity
gen return_favours = plh0206i01 if inrange(plh0206i01,1,7)
gen selflessness = plh0206i04 if inrange(plh0206i04,1,7)
gen face_costs_to_help = plh0206i06 if inrange(plh0206i06,1,7)
foreach var in return_favours selflessness face_costs_to_help {
	label values `var' plh0206i01_EN
} // var

*Create a measure of positive reciprocity expressed as a percentage
egen positive_reciprocity = rowtotal(return_favours selflessness face_costs_to_help) if return_favours!=. & selflessness!=. & face_costs_to_help!=.
replace positive_reciprocity = (positive_reciprocity-3)/18
egen std_p_reciprocity = std(positive_reciprocity)

*Negative Reciprocity
gen take_revenge = plh0206i02 if inrange(plh0206i02,1,7)
gen payback = plh0206i03 if inrange(plh0206i03,1,7)
gen offend_back = plh0206i05 if inrange(plh0206i05,1,7)
foreach var in take_revenge payback offend_back {
	label values `var' plh0206i02_EN
} // var

*Create a percentage measure (transfrom 3-21 into 0-1 (100%))
egen negative_reciprocity = rowtotal(take_revenge payback offend_back) if take_revenge!=. & payback!=. & offend_back!=.
replace negative_reciprocity = (negative_reciprocity-3)/18
egen std_n_reciprocity = std(negative_reciprocity)

*Forgiveness (Tendency to forgive)
gen fast_forgiver = plh0206i07 if inrange(plh0206i07,1,7)
gen wrongdoing_thoughts = 8-plh0206i08 if inrange(plh0206i08,1,7)
gen bear_grudges = 8-plh0206i09 if inrange(plh0206i09,1,7)
gen forgive_n_forget = plh0206i10 if inrange(plh0206i10,1,7)
foreach var in fast_forgiver forgive_n_forget {
	label values `var' plh0206i07_EN
} // var
label define forgiveness 1 "[1] Applies Completely, (Scale 1-7)"  2 "[2] 2 On Scale 1-High, 7-Low" 3 "[3] 3 On Scale 1-High, 7-Low" 4 "[4] 4 On Scale 1-High, 7-Low" 5 "[5] 5 On Scale 1-High, 7-Low" 6 "[6] 6 On Scale 1-High, 7-Low" 7 "[7] Does Not Apply At All, (Scale 1-7)"
foreach var in wrongdoing_thoughts bear_grudges {
	label values `var' forgiveness
} // var

*Create a percentage (transfrom 4-28 --> 0-1 (100%)) and standardised measure of forgiveness
egen forgiveness = rowtotal(fast_forgiver wrongdoing_thoughts forgive_n_forget bear_grudges) if fast_forgiver!=. & wrongdoing_thoughts!=. & forgive_n_forget!=. & bear_grudges!=.
gen forgive = (forgiveness-4)/24 
egen stdforgive = std(forgiveness)

*Optimism / Pessimism 
gen pessimism_scale = plh0244 if inlist(plh0244,-1,-2,-3,-5,-8)==0
label values pessimism_scale plh0244_EN
gen pessimism = plh0244==3 | plh0244==4 if inlist(plh0244,-1,-2,-3,-5,-8)==0

*Patience 
gen patience = plh0253 if inrange(plh0253,0,10)
label values patience plh0253_EN
gen impatient = 11-patience if patience!=. 

*Impulsiveness
gen impulsiveness = plh0254 if inrange(plh0254,0,10)
label values impulsiveness plh0254_EN

*Flourishing in Life
gen flourishing = plh0334 if inrange(plh0334,0,10)
label values flourishing plh0034_EN

*Worried about economic situation (own and general)
gen worries_econ_development = plh0032==1 if inrange(plh0032,1,3)  
gen worries_own_economic = plh0033==1 if inrange(plh0033,1,3)

*Worries about environment and own health
gen worries_own_health = plh0035==1 if inrange(plh0035,1,3)
gen worries_environment = plh0036==1 if inrange(plh0036,1,3) 

save "${mydata_user}airpollution_adult.dta", replace

*------------------------------------------------------------------------------*

*** Combine the datasets of youth and adult outcomes

use "${mydata_user}airpollution_adult", clear

merge 1:1 pid syear using "${mydata_user}airpollution_youth"
soepdrop _merge

save "${mydata_user}airpollution_all.dta", replace

*------------------------------------------------------------------------------*

*** Create the final dataset to be used for the analysis

use "${mydata_user}airpollution_controls", clear

merge 1:1 pid syear using "${mydata_user}airpollution_all"
soepkeep if _merge==3 
soepdrop _merge

xtset pid syear

**Finish Altruism measure based on actual donations 
gen altruism_hh = (money_donated / n_hh_income) if n_hh_income!=. & n_hh_income!=0
gen altruism_lhh = (money_donated / hh_labour_income) if hh_labour_income!=. & hh_labour_income!=0
gen altruism_linc = (money_donated / ind_labour_income) if ind_labour_income!=. & ind_labour_income!=0


**Create a single variable for the date, month, and year of the interview
gen year = syear 
replace year = iyear if iyear==2011
gen month = pmonin if inrange(pmonin,1,12)
replace month = imonth if imonth!=. 
gen day = ptagin if inrange(ptagin,1,31)
replace day = iday if iday!=.
gen intdate = mdy(month,day,year)
format intdate %td


**Create seasonal dummies 
gen quarter = inrange(month,1,3) if inrange(month,1,12)
replace quarter = 2 if inrange(month,4,6)
replace quarter = 3 if inrange(month,7,9)
replace quarter = 4 if inrange(month,10,12)


**Political variables
recode plh0007 plh0011_h plh0012_h plh0013_h (min/-1=.)

*Interested in politics?
gen pol_interest = inlist(plh0007,1,2) if plh0007<.

*Lean to any party?
gen pol_lean = inlist(plh0011_h,1) if plh0011_h<.

*Lean to which party?
gen CDU          = inlist(plh0012_h,2,3,13) if plh0012_h<.
gen FDP          = (plh0012_h==4) if plh0012_h<.
gen CDUFDP       = inlist(plh0012_h,14,22) if plh0012_h<.
gen CDUSPD       = (plh0012_h==10) if plh0012_h<.
gen SPD          = (plh0012_h==1) if plh0012_h<.
gen GRUNE        = (plh0012_h==5) if plh0012_h<.
gen SPDGRUNE     = (plh0012_h==9) if plh0012_h<.
gen DIELINKE     = (plh0012_h==6) if plh0012_h<.
gen R2G          = (plh0012_h==17) if plh0012_h<.
gen AfD          = (plh0012_h==27) if plh0012_h<.


**Generate Incumbent
gen Incumbent = .
label variable Incumbent "Incumbent"

        //BW
        replace Incumbent = ( SPD + FDP )                        if Incumbent==. & intdate<= td(05oct1980)
        replace Incumbent = ( CDU + FDP + CDUFDP )               if Incumbent==. & intdate<= td(06mar1983)
        replace Incumbent = ( CDU + FDP + CDUFDP )               if Incumbent==. & intdate<= td(25jan1987)
        replace Incumbent = ( CDU + FDP + CDUFDP )               if Incumbent==. & intdate<= td(02dec1990)
        replace Incumbent = ( CDU + FDP + CDUFDP )               if Incumbent==. & intdate<= td(16oct1994)
        replace Incumbent = ( CDU + FDP + CDUFDP )               if Incumbent==. & intdate<= td(27sep1998)
        replace Incumbent = ( SPD + GRUNE + SPDGRUNE )            if Incumbent==. & intdate<= td(22sep2002)
        replace Incumbent = ( SPD + GRUNE + SPDGRUNE )            if Incumbent==. & intdate<= td(18sep2005)
        replace Incumbent = ( CDU + SPD + CDUSPD)                if Incumbent==. & intdate<= td(27sep2009)
        replace Incumbent = ( CDU + FDP + CDUFDP )               if Incumbent==. & intdate<= td(22sep2013)
        replace Incumbent = ( CDU + SPD + CDUSPD )               if Incumbent==. & intdate<= td(24sep2017)
        replace Incumbent = ( CDU + SPD + CDUSPD )               if Incumbent==. & intdate<= td(24sep2021)

		
**Generate Established opposition
gen Est_opp = .
label variable Est_opp "Established opposition"

        //BW
        replace Est_opp = ( CDU + GRUNE + DIELINKE )                     if Est_opp==. & intdate<= td(05oct1980)
        replace Est_opp = ( SPD + GRUNE + DIELINKE + SPDGRUNE + R2G )    if Est_opp==. & intdate<= td(06mar1983)
        replace Est_opp = ( SPD + GRUNE + DIELINKE + SPDGRUNE + R2G)     if Est_opp==. & intdate<= td(25jan1987)
        replace Est_opp = ( SPD + GRUNE + DIELINKE + SPDGRUNE + R2G)     if Est_opp==. & intdate<= td(02dec1990)
        replace Est_opp = ( SPD + GRUNE + DIELINKE + SPDGRUNE + R2G)     if Est_opp==. & intdate<= td(16oct1994)
        replace Est_opp = ( SPD + GRUNE + DIELINKE + SPDGRUNE + R2G)     if Est_opp==. & intdate<= td(27sep1998)
        replace Est_opp = ( CDU + FDP + DIELINKE )                       if Est_opp==. & intdate<= td(22sep2002)
        replace Est_opp = ( CDU + FDP + DIELINKE )                       if Est_opp==. & intdate<= td(18sep2005)
        replace Est_opp = ( FDP + GRUNE + DIELINKE )                     if Est_opp==. & intdate<= td(27sep2009)
        replace Est_opp = ( SPD + GRUNE + DIELINKE + SPDGRUNE + R2G)     if Est_opp==. & intdate<= td(22sep2013)
        replace Est_opp = ( FDP + GRUNE + DIELINKE )                     if Est_opp==. & intdate<= td(24sep2017)
        replace Est_opp = ( FDP + GRUNE + DIELINKE + AfD)                if Est_opp==. & intdate<= td(24sep2021)

save "${mydata_user}airpollution_full.dta", replace

*------------------------------------------------------------------------------*

*** Combine the SOEP dataset with Kreis-information 

use "${mydata_user}airpollution_full.dta", clear
rename intdate date1
merge m:1 hid syear using "${soep36}/regionl"

*Generate a variable that is 1 if a person lives in a community with >50k population
gen city_dummy = inrange(ggk,5,7) if inrange(ggk,1,7)
soepdrop _merge
save "${mydata_user}soep_kreise.dta", replace

*------------------------------------------------------------------------------*

*** Combine the outcome dataset with air pollution and weather data 

use "${mydata_user}soep_kreise.dta", clear

merge m:1 date1 kkz_rek using "${mydata_user}ror_kreis_pollution_weather.dta", nogen keep(master match)
/*
The dataset ror_kreis_pollution_weather.dta contains county-by-day observations on pollution and
weather variables. This can be created from the raw pollution and weather datasets using the procedure 
employed in 2_merge.do based on the inverse distance weighted average function specified in
distanceprog.do
*/

gen pm10_binary = 0 if inrange(pm10,0,20)
replace pm10_binary = 1 if pm10>20 & pm10!=.
gen ma07_pm10_binary = 0 if inrange(ma07_pm10,0,20)
replace ma07_pm10_binary = 1 if ma07_pm10>20 & ma07_pm10!=.
gen pm10_dummy = pm10
recode pm10_dummy (min/15=0) (15/20=1) (20/25=2) (25/30=3) (30/max=4)
gen ma07_pm10_dummy = ma07_pm10
recode ma07_pm10_dummy (min/15=0) (15/20=1) (20/25=2) (25/30=3) (30/max=4)
sum pm10 pm10_dummy ma07_pm10 ma07_pm10_dummy o3 ma07_o3 

*------------------------------------------------------------------------------*

*** Generate principal component of negative emotions
pca angry_dummy worried_dummy happy_dummy sad_dummy
predict pc1, score

*------------------------------------------------------------------------------*

save "${mydata_user}airpollution_analysis", replace

*------------------------------------------------------------------------------*

clear

exit
